*******************************************************************
*Replication code for
*	Article: How COVID-19 affects voting for incumbents: evidence from local elections in France
*	Journal: PlosOne
*	Authors: Davide Morisi, Héloïse Cloléry, Guillaume Kon Kam King, and Max Schaub
*******************************************************************

clear all
cap log close
set more off

*cd "[your working directory]"


*** Paths
global pathdo "do_files"
global pathdata "data"
global pathtables "tables"
global pathfigures "figures"

*open dataset
use "$pathdata\Covid19_Incumbency_Dataset.dta", clear


*******************
*LABELLING CONTROL VARIABLES
label variable MaleToFemaleRatio "Male/female ratio"
label variable shareAbove65 "Share over 65"
label variable shareAbove75 "Share over 75"
label variable shareAbove80 "Share over 80"
label variable shareImmigrants "Share immigrants"
label variable shareBlueCollar "Share blue collar"
label variable shareUnemp "Unemployment rate"
label variable MedianStandardLiving "Median household income"
label variable PovertyRate "Poverty rate"
label variable ShareWelfare "Share living off welfare"
label variable PopDensity "Population density"

gen prevalence1_100 = prevalence1*100
label variable prevalence1 "COVID-19 attack rate, 1st round of elections"
label variable prevalence1_100 "COVID-19 attack rate (times 100), 1st round of elections"

gen baseline_mortality_both1_1000 = baseline_mortality_both1*1000
label variable baseline_mortality_both1 "Baseline mortality, 1st round of elections" 
label variable baseline_mortality_both1_1000 "Baseline mortality (times 1,000), 1st round" 


********************
*TURNOUT
gen Turnout2014 = Voters_2014/Registered_2014
label variable Turnout2014 "Turnout 2014 (share of eligible voters who went to vote)"
sum Turnout2014, d

gen Turnout2020 = Voters_2020/Registered_2020
label variable Turnout2020 "Turnout 2020 (share of eligible voters who went to vote)"
sum Turnout2020, d


**************
*DVS

*** CANDIDATE votes
*Incumbent variables
fre Incumbent // In 54% of muncipalities, incumbents ran again for office; this is our main sample of interest
gen Incumbents_sample=1 if Incumbent=="yes"
replace Incumbents_sample=0 if Incumbent=="no"
label variable Incumbents_sample "Sample of 2014 mayors running again for office in 2020 (incumbents)"

*incumbent vote difference 2020-2014
cap drop inc_vote_gap
gen inc_vote_gap = vote_share_Incumbent_2020-vote_share_Incumbent_2014 if Incumbents_sample==1
label var inc_vote_gap "Incumbent vote diff in 2020 vs 2014 (positive=increase in 2020)"
sum inc_vote_gap, d // positive value = increase of votes in 2020 compared to 2014

*POLITICAL AFFILIATION OF INCUMBENTS
fre IncumbentList // this is the affiliation of the mayor in 2014
fre IncumbentList if Incumbents_sample==1 // the affiliation of the incumbents in 2020
*LCOM > communist party, radical
*LFG > front de gauche, left
*LPG > parti de gauche
*LSOC > parti socialiste, left (mainstream)
*LUG > union de la gauche
*LDVG > diverse left
*LVEC > Europe-Ecologie-Les Verts, left

*LDIV > diverse, no affiliation
*LMDM > modem, centre
*LUC > union de centre
*LUDI > Union des démocrates et indépendants, centre

*LLR > les republicains, right (mainstream)
*LUD > union de la droite
*LDVD > diverse right
*LRN > rassemblement national, (FN), extreme right
*LEXD > extreme right

*left-wing incumbent
gen incumbent_left = 0 if Incumbents_sample==1
replace incumbent_left = 1 if (IncumbentList=="LCOM" | IncumbentList=="LFG" | IncumbentList=="LPG" | IncumbentList=="LSOC" | IncumbentList=="LUG" | IncumbentList=="LDVG" | IncumbentList=="LVEC") & Incumbents_sample==1
label define incumbent_left 0 "Other party incumbent" 1 "Left-wing incumbent", modify
label val incumbent_left incumbent_left
label variable incumbent_left "Incumbent from political left"
fre incumbent_left

*centre/unclear affiliation
gen incumbent_centre = 0 if Incumbents_sample==1
replace incumbent_centre = 1 if (IncumbentList=="LDIV" | IncumbentList=="LMDM" | IncumbentList=="LUC" | IncumbentList=="LUDI") & Incumbents_sample==1
label define incumbent_centre 0 "Other party incumbent" 1 "Centre/unclear affiliation", modify
label val incumbent_centre incumbent_centre
label variable incumbent_centre "Incumbent from the centre/with no clear affiliation"
fre incumbent_centre

*right wing incumbent
gen incumbent_right = 0 if Incumbents_sample==1
replace incumbent_right = 1 if (IncumbentList=="LDVD" | IncumbentList=="LEXD" | IncumbentList=="LLR" | IncumbentList=="LRN" | IncumbentList=="LUD") & Incumbents_sample==1
label define incumbent_right 0 "Other party incumbent" 1 "Right-wing incumbent", modify
label val incumbent_right incumbent_right
label variable incumbent_right "Incumbent from political right"
fre incumbent_right

*radical incumbents > only 1.3%
gen incumbent_ext = 0 if Incumbents_sample==1
replace incumbent_ext = 1 if (IncumbentList=="LCOM" | IncumbentList=="LFG" | IncumbentList=="LPG" | IncumbentList=="LRN" | IncumbentList=="LEXD") & Incumbents_sample==1
label define incumbent_ext 0 "Other party incumbent" 1 "Radical incumbent", modify
label val incumbent_ext incumbent_ext
label variable incumbent_ext "Incumbent affiliated to radical party"
fre incumbent_ext

*3 categories
gen incumbent3cat = 0 if incumbent_left!=.
replace incumbent3cat = 1 if incumbent_left==1 & incumbent_centre==0 & incumbent_right==0
replace incumbent3cat = 2 if incumbent_left==0 & incumbent_centre==1 & incumbent_right==0
replace incumbent3cat = 3 if incumbent_left==0 & incumbent_centre==0 & incumbent_right==1
recode incumbent3cat 0=.
label de incumbent3cat 1"left" 2"centre/unclear"  3"right", replace
label val incumbent3cat incumbent3cat
label var incumbent3cat "Political affiliation of incumbents in 2020"
fre incumbent3cat


**********************
*Covid-19 prevalence

*PREVALENCE 1
sum prevalence1_100, d
*logged and normalized
gen lprevalence1_100 = log(prevalence1_100)
sum lprevalence1_100
egen min = min(lprevalence1_100)
egen max = max(lprevalence1_100)
gen lprevalence1_norm = (lprevalence1_100 - min)/(max-min)
sum lprevalence1_norm
drop min max

*Additional measure of prevalence 1st round (different time windows)
gen prevalence_4w_1st_rnd_100 =  prevalence_4w_1st_rnd*100
*logged and normalized
gen lprevalence_4w_1st_rnd_100 = log(prevalence_4w_1st_rnd_100)
egen min = min(lprevalence_4w_1st_rnd_100)
egen max = max(lprevalence_4w_1st_rnd_100)
gen lprevalence4w_norm = (lprevalence_4w_1st_rnd_100 - min)/(max-min)
sum lprevalence4w_norm
drop min max


*********************
*RECODE CONTROL VARIABLES (logged transformations)

gen log_PopDensity = log(PopDensity)
gen log_MaleToFemaleRatio = log(MaleToFemaleRatio)

gen log_shareImmigrants = log(shareImmigrants)
*recode 83 municipalities with 0 immigrants as minimum value of log variable
egen min = min(log_shareImmigrants)
replace log_shareImmigrants = min if log_shareImmigrants==.

*other var
gen log_shareBlueCollar = log(shareBlueCollar)
gen log_shareUnemp = log(shareUnemp)
gen log_shareAbove65 = log(shareAbove65)
gen log_shareAbove75 = log(shareAbove75)
gen log_shareAbove80 = log(shareAbove80)
gen log_education2 = log(Education2)
gen log_education5 = log(Education5)
gen log_baseline_mortality = log(baseline_mortality_both1)


********************
*2014 census data
sum shareImmigrants2014 shareUnemp2014 shareBlueCollar2014 MedianStandardLiving2014

*income
gen MedianStandardLiving100 = MedianStandardLiving/100
label var MedianStandardLiving100 "Median household income in hundreds of euros"
*change in median household income
gen income_change = MedianStandardLiving - MedianStandardLiving2014
gen income_change100 = income_change/100 // change in hundreds of euros

* change in unemployment
sum shareUnemp - shareUnemp2014, d
gen unempl_change = shareUnemp - shareUnemp2014
gen unempl_change100 = unempl_change*100 // in percentages
sum unempl_change100, d // positive = unemployment went up in 2017 relative to 2014


*************
*Merge IPSOS data

*create population categories in line with IPSOS categories
/*AGGLO5
	1	Rural
	2	2.000 Ã  19.999 hab.
	3	20.000 Ã  99.999 hab.
	4	100.000 hab. et plus
	5	AgglomÃ©ration de Paris
*/
*list of dep related to agglomeration de paris
*75, 77, 78, 91, 92, 93, 94 ,95     
sum Pred_Population2020
gen size = 10 if Pred_Population2020<2000 & DepartmentCode!="75"
replace size = 20 if (Pred_Population2020>=2000 & Pred_Population2020<20000) & DepartmentCode!="75"
replace size = 30 if (Pred_Population2020>=20000 & Pred_Population2020<100000) & DepartmentCode!="75"
replace size = 40 if Pred_Population2020>=100000 & DepartmentCode!="75"
replace size = 50 if DepartmentCode=="75"
fre size

*merge at dep level
clonevar depcode_2digit = DepartmentCode
merge m:1 depcode_2digit using "$pathdata\ipsos_anxiety_collapse_dep.dta"
fre DEP if _merge==2 // missing data from Corsica, 20=Corse-du-Sud, 96=Haute-Corse
drop if _merge==2
drop _merge


***************
*number of candidates
egen total_candidates = rowtotal(N_candidates*_2020)
fre total_candidates N_candidates_2020_1st // 37% of cities with only one candidate
recode N_candidates_2020_1st 0=. 3/14=3, gen(N_candidates_3cat)
fre N_candidates_3cat



************
save "$pathdata\main_dataset_recoded.dta", replace
